Plotting the results of Utility Values

In [1]:
!pip install plotly
import math
import random

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
Requirement already satisfied: plotly in /home/wtt/anaconda3/lib/python3.7/site-packages (4.5.4)
Requirement already satisfied: retrying>=1.3.3 in /home/wtt/anaconda3/lib/python3.7/site-packages (from plotly) (1.3.3)
Requirement already satisfied: six in /home/wtt/anaconda3/lib/python3.7/site-packages (from plotly) (1.13.0)

Reading csv

In [2]:
DF_original_VI = pd.read_csv("results/original_value_iteration_utility_history.csv", header=None).transpose()
DF_original_PI = pd.read_csv("results/original_modified_policy_iteration_utility_history.csv", header=None).transpose()

DF_expt1_VI = pd.read_csv("results/experiment1_value_iteration_utility_history.csv", header=None).transpose()
DF_expt1_PI = pd.read_csv("results/experiment1_modified_policy_iteration_utility_history.csv", header=None).transpose()

DF_expt2_VI = pd.read_csv("results/experiment2_value_iteration_utility_history.csv", header=None).transpose()
DF_expt2_PI = pd.read_csv("results/experiment2_modified_policy_iteration_utility_history.csv", header=None).transpose()

# index : (Description, DataFrame, color_of_graph, scale)
DF_dict = {0 : ("DF-Original (Value Iteration) - 36 States", DF_original_VI, "darkred", 1), 
           1 : ("DF-Original (Policy Iteration) - 36 States", DF_original_PI, "red", 1), 
           2 : ("DF-Experiment1 (Value Iteration) - 144 States", DF_expt1_VI, "darkgreen", 2),
           3 : ("DF-Experiment1 (Policy Iteration) - 144 States", DF_expt1_PI, "limegreen", 2),
           4 : ("DF-Experiment2 (Value Iteration) - 324 States", DF_expt2_VI, "darkblue", 3),
           5 : ("DF-Experiment2 (Policy Iteration) - 324 States", DF_expt2_PI, "skyblue", 3),}

Display DF

In [3]:
for i, (k, v) in enumerate(DF_dict.items()):
    print("\n> " + v[0] + ":")
    display(v[1])
> DF-Original (Value Iteration) - 36 States:
0 1 2 3 4 5 6 7 8 9 ... 26 27 28 29 30 31 32 33 34 35
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
1 1.0000 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 0.0 -1.0000 -0.0400 -0.0400 ... 1.0000 -0.0400 -1.0000 -0.0400 1.0000 -1.0000 -0.0400 1.0000 -0.0400 -0.0400
2 1.9900 0.6490 -0.0790 -0.0790 -0.0790 -0.0790 0.0 -1.0390 -0.1740 -0.0790 ... 1.7841 0.7520 -1.0390 -0.0790 1.8870 -0.4060 0.8470 1.7841 0.6490 -0.0790
3 2.9701 1.4974 0.4489 -0.1180 -0.1180 -0.1180 0.0 -0.5240 -0.2130 -0.1280 ... 2.5713 1.4467 -0.4190 -0.1180 2.7550 0.4141 1.6335 2.5611 1.3343 0.4583
4 3.9404 2.4086 1.1692 0.2911 -0.1570 -0.1570 0.0 0.1373 0.2509 -0.1680 ... 3.3414 2.2156 0.2364 0.2697 3.6133 1.2640 2.4047 3.3222 2.0790 1.0504
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
684 99.8966 98.2900 96.8451 95.4505 94.2091 92.8341 0.0 95.7796 95.4830 94.3491 ... 92.9990 91.7110 89.4450 88.4657 93.2251 90.8145 91.6915 91.7847 90.4634 89.1943
685 99.8976 98.2910 96.8461 95.4515 94.2102 92.8351 0.0 95.7807 95.4841 94.3501 ... 93.0000 91.7121 89.4461 88.4667 93.2261 90.8156 91.6925 91.7857 90.4644 89.1953
686 99.8987 98.2920 96.8472 95.4525 94.2112 92.8361 0.0 95.7817 95.4851 94.3512 ... 93.0010 91.7131 89.4471 88.4678 93.2272 90.8166 91.6935 91.7868 90.4654 89.1964
687 99.8997 98.2930 96.8482 95.4535 94.2122 92.8372 0.0 95.7827 95.4861 94.3522 ... 93.0021 91.7141 89.4481 88.4688 93.2282 90.8176 91.6946 91.7878 90.4664 89.1974
688 99.9007 98.2940 96.8492 95.4545 94.2132 92.8382 0.0 95.7837 95.4871 94.3532 ... 93.0031 91.7151 89.4491 88.4698 93.2292 90.8186 91.6956 91.7888 90.4675 89.1984

689 rows × 36 columns

> DF-Original (Policy Iteration) - 36 States:
0 1 2 3 4 5 6 7 8 9 ... 26 27 28 29 30 31 32 33 34 35
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
1 19.9980 16.7777 2.4854 -1.1770 -1.1070 -1.0760 0.0 -2.3740 1.3982 -1.7760 ... 6.8278 -1.8370 -0.8090 -1.0200 14.5490 4.3732 6.9548 12.5990 10.8006 9.2090
2 40.8225 39.2157 37.7693 36.3609 20.7942 21.7219 0.0 36.7038 36.3948 35.1479 ... 28.2738 27.8182 25.8010 25.6718 33.4780 31.1267 28.0398 29.0297 27.7681 26.6449
3 56.2264 54.6198 53.1749 51.7802 50.5389 38.3277 0.0 52.1094 51.8128 50.6789 ... 47.6995 43.8119 41.7008 41.1142 47.8191 45.4679 44.7084 44.7252 43.3863 42.1435
4 67.6206 66.0140 64.5691 63.1745 61.9332 59.6968 0.0 63.5037 63.2071 62.0731 ... 59.7381 58.5331 56.2609 54.8723 58.4273 56.0760 58.2864 58.4196 57.1325 55.8486
5 76.0490 74.4423 72.9975 71.6028 70.3615 68.9865 0.0 71.9320 71.6354 70.5015 ... 69.1404 67.8345 65.5546 64.3483 69.1860 66.7539 67.7952 67.7763 66.4682 65.1857
6 82.2834 80.6768 79.2319 77.8373 76.5959 75.2209 0.0 78.1664 77.8698 76.7359 ... 75.3854 74.0972 71.8310 70.6358 75.6014 73.1895 74.0764 74.1696 72.8482 71.5553
7 86.8950 85.2884 83.8435 82.4489 81.2076 79.8325 0.0 82.7781 82.4815 81.3475 ... 79.9974 78.7094 76.4434 75.4641 80.2230 77.8124 78.6898 78.7830 77.4617 76.1926

8 rows × 36 columns

> DF-Experiment1 (Value Iteration) - 144 States:
0 1 2 3 4 5 6 7 8 9 ... 134 135 136 137 138 139 140 141 142 143
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.000
1 1.0000 1.0000 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 ... -1.0000 -1.0000 -0.0400 -0.0400 1.0000 1.0000 -0.0400 -0.0400 -0.0400 -0.040
2 1.9900 1.9900 0.7441 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790 ... -0.4060 -1.2290 -0.0790 0.7441 1.9900 1.9900 0.7441 -0.0790 -0.0790 -0.079
3 2.9701 2.9701 1.6740 0.5336 -0.1180 -0.1180 -0.1180 -0.1180 -0.1180 -0.1180 ... 0.4957 -1.3060 0.6057 1.6936 2.9599 2.9599 1.6740 0.5336 -0.1180 -0.118
4 3.9404 3.9404 2.6254 1.3737 0.3590 -0.1570 -0.1570 -0.1570 -0.1570 -0.1570 ... 1.4343 -0.7210 1.5096 2.6417 3.9191 3.9172 2.6101 1.3737 0.3590 -0.157
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
684 99.8966 99.8966 98.5678 97.2335 95.9107 94.6021 93.3096 92.0342 90.7874 89.5661 ... 97.2254 94.7542 96.0280 97.1778 98.4549 98.4526 97.1294 95.8038 94.4919 93.197
685 99.8976 99.8976 98.5688 97.2345 95.9117 94.6031 93.3106 92.0352 90.7884 89.5672 ... 97.2264 94.7552 96.0290 97.1788 98.4559 98.4536 97.1304 95.8048 94.4929 93.198
686 99.8987 99.8987 98.5698 97.2355 95.9127 94.6042 93.3116 92.0363 90.7895 89.5682 ... 97.2274 94.7562 96.0300 97.1798 98.4569 98.4546 97.1314 95.8058 94.4939 93.199
687 99.8997 99.8997 98.5708 97.2365 95.9137 94.6052 93.3126 92.0373 90.7905 89.5692 ... 97.2284 94.7572 96.0310 97.1808 98.4579 98.4556 97.1324 95.8068 94.4949 93.200
688 99.9007 99.9007 98.5718 97.2375 95.9147 94.6062 93.3136 92.0383 90.7915 89.5702 ... 97.2294 94.7582 96.0320 97.1818 98.4589 98.4566 97.1334 95.8078 94.4959 93.201

689 rows × 144 columns

> DF-Experiment1 (Policy Iteration) - 144 States:
0 1 2 3 4 5 6 7 8 9 ... 134 135 136 137 138 139 140 141 142 143
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
1 14.7683 1.6568 -1.2520 -1.5980 -1.6350 -1.2810 -1.1300 -1.0750 -1.0540 -1.0450 ... 3.5415 1.8936 1.5329 -1.0820 -3.3410 -5.3220 -6.7900 -2.5230 -1.9680 -1.4720
2 37.2340 37.2340 35.9032 34.4378 1.7332 -1.4330 -1.7760 -1.8120 -1.8110 -1.8110 ... 34.1527 31.6736 30.0807 28.6277 27.0748 27.1675 0.9185 -1.6900 -1.9470 -2.0130
3 53.5725 53.5725 52.2425 50.8980 49.5533 48.2176 46.8954 45.5897 -2.3810 -2.3810 ... 50.5546 48.0756 46.8338 45.5156 45.6116 45.6977 44.4619 43.2140 41.5690 6.5525
4 65.6576 65.6576 64.3287 62.9944 61.6715 60.3625 59.0681 57.7902 56.5414 29.7976 ... 62.7456 60.2744 59.0836 58.2704 59.7960 59.7821 58.4718 57.1677 55.8753 54.5946
5 74.5969 74.5969 73.2680 71.9337 70.6110 69.3029 68.0115 66.7362 65.4894 64.2682 ... 71.7475 69.2763 68.3839 69.8866 70.8273 70.7591 69.4033 68.0602 66.7333 65.4248
6 81.2093 81.2093 79.8805 78.5462 77.2234 75.9152 74.6235 73.3494 72.1036 70.8832 ... 78.4063 75.9351 76.5689 76.8100 77.8928 77.9250 76.6374 75.3397 74.0531 72.7815
7 86.1005 86.1005 84.7717 83.4374 82.1146 80.8063 79.5144 78.2400 76.9939 75.7732 ... 83.3317 80.8605 81.5439 81.8269 83.2183 83.2139 81.8959 80.5776 79.2742 77.9886
8 89.7185 89.7185 88.3897 87.0554 85.7326 84.4243 83.1322 81.8575 80.6112 79.3904 ... 86.9751 84.5039 85.2845 86.1000 87.3080 87.3020 85.9751 84.6460 83.3306 82.0321
9 92.3948 92.3948 91.0660 89.7317 88.4089 87.1005 85.8083 84.5334 83.2870 82.0661 ... 89.6701 87.1989 88.0357 89.0712 90.3045 90.2999 88.9751 87.6479 86.3341 85.0371
10 94.3744 94.3744 93.0456 91.7113 90.3885 89.0800 87.7878 86.5128 85.2663 84.0452 ... 91.6636 89.1924 90.0696 91.2466 92.4960 92.4922 91.1681 89.8415 88.5284 87.2321
11 95.8388 95.8388 94.5099 93.1756 91.8528 90.5443 89.2520 87.9769 86.7303 85.5092 ... 93.1382 90.6670 91.7108 92.8412 94.1010 94.0977 92.7739 91.4477 90.1351 88.8394
12 96.9219 96.9219 95.5931 94.2588 92.9360 91.6275 90.3351 89.0599 87.8133 86.5922 ... 94.2290 91.7578 92.8743 94.0114 95.2779 95.2750 93.9514 92.6254 91.3131 90.0177

13 rows × 144 columns

> DF-Experiment2 (Value Iteration) - 324 States:
0 1 2 3 4 5 6 7 8 9 ... 314 315 316 317 318 319 320 321 322 323
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
1 1.0000 1.0000 1.0000 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 ... -0.0400 1.0000 1.0000 1.0000 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400 -0.0400
2 1.9900 1.9900 1.9900 0.7441 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790 ... 0.7441 1.9900 1.9900 1.9900 0.7441 -0.0790 -0.0790 -0.0790 -0.0790 -0.0790
3 2.9701 2.9701 2.9701 1.6834 0.5336 -0.1180 -0.1180 -0.1180 -0.1180 -0.1180 ... 1.6834 2.9701 2.9701 2.9701 1.6834 0.5336 -0.1180 -0.1180 -0.1180 -0.1180
4 3.9404 3.9404 3.9404 2.6447 1.3989 0.3590 -0.1570 -0.1570 -0.1570 -0.1570 ... 2.6466 3.9394 3.9404 3.9394 2.6447 1.3989 0.3590 -0.1570 -0.1570 -0.1570
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
684 99.8966 99.8966 99.8966 98.5966 97.3079 96.0296 94.7625 93.5073 92.2647 91.0353 ... 98.5631 99.8608 99.8623 99.8603 98.5575 97.2656 95.9841 94.7138 93.4557 92.2105
685 99.8976 99.8976 99.8976 98.5977 97.3090 96.0306 94.7635 93.5083 92.2658 91.0363 ... 98.5642 99.8619 99.8633 99.8613 98.5586 97.2667 95.9851 94.7148 93.4567 92.2115
686 99.8987 99.8987 99.8987 98.5987 97.3100 96.0317 94.7645 93.5094 92.2668 91.0373 ... 98.5652 99.8629 99.8644 99.8624 98.5596 97.2677 95.9861 94.7158 93.4577 92.2125
687 99.8997 99.8997 99.8997 98.5997 97.3110 96.0327 94.7656 93.5104 92.2678 91.0384 ... 98.5662 99.8639 99.8654 99.8634 98.5606 97.2687 95.9871 94.7169 93.4587 92.2135
688 99.9007 99.9007 99.9007 98.6007 97.3120 96.0337 94.7666 93.5114 92.2688 91.0394 ... 98.5672 99.8649 99.8664 99.8644 98.5616 97.2697 95.9881 94.7179 93.4597 92.2145

689 rows × 324 columns

> DF-Experiment2 (Policy Iteration) - 324 States:
0 1 2 3 4 5 6 7 8 9 ... 314 315 316 317 318 319 320 321 322 323
0 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 ... 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
1 21.0312 5.6499 2.7404 -0.0500 -0.7490 -0.7950 -1.2070 -2.4240 -2.5020 -2.4190 ... 18.8126 21.1678 20.8203 21.1072 13.0664 11.8792 10.8249 4.2025 1.6768 2.0744
2 41.9960 41.9960 41.9960 40.5319 39.1264 37.7106 36.2779 34.5494 -1.4460 -1.8140 ... 37.9847 39.3949 40.0966 40.1640 38.9226 37.6874 36.2060 34.7238 30.1129 28.7734
3 57.0983 57.0983 57.0983 55.7984 54.5095 53.2300 51.9595 50.6985 49.4477 48.2083 ... 54.2811 55.5824 55.5821 55.5826 54.2811 52.9931 51.7139 50.3389 49.0065 47.7118
4 68.2656 68.2656 68.2656 66.9657 65.6769 64.3986 63.1315 61.8763 60.6336 59.4039 ... 65.8326 67.1320 67.1337 67.1317 65.8289 64.5370 63.2555 61.9852 60.7270 59.4817
5 76.5261 76.5261 76.5261 75.2261 73.9374 72.6591 71.3920 70.1368 68.8942 67.6648 ... 74.3786 75.6778 75.6795 75.6775 74.3747 73.0828 71.8012 70.5309 69.2728 68.0276
6 82.6363 82.6363 82.6363 81.3363 80.0476 78.7693 77.5022 76.2470 75.0044 73.7750 ... 80.7014 81.9995 82.0010 81.9990 80.6962 79.4043 78.1227 76.8524 75.5943 74.3491
7 87.1561 87.1561 87.1561 85.8561 84.5674 83.2891 82.0220 80.7668 79.5242 78.2948 ... 85.3776 86.6756 86.6771 86.6751 85.3723 84.0804 82.7989 81.5286 80.2704 79.0253
8 90.4994 90.4994 90.4994 89.1994 87.9107 86.6324 85.3652 84.1101 82.8675 81.6381 ... 88.8366 90.1345 90.1360 90.1340 88.8312 87.5394 86.2578 84.9875 83.7294 82.4842
9 92.9724 92.9724 92.9724 91.6724 90.3837 89.1054 87.8382 86.5831 85.3405 84.1111 ... 91.3953 92.6931 92.6946 92.6926 91.3898 90.0980 88.8164 87.5461 86.2880 85.0428
10 94.8017 94.8017 94.8017 93.5017 92.2130 90.9347 89.6675 88.4124 87.1698 85.9403 ... 93.2880 94.5858 94.5872 94.5853 93.2825 91.9906 90.7090 89.4387 88.1806 86.9354
11 96.1548 96.1548 96.1548 94.8548 93.5661 92.2878 91.0207 89.7655 88.5229 87.2935 ... 94.6880 95.9857 95.9872 95.9852 94.6824 93.3905 92.1090 90.8387 89.5806 88.3354
12 97.1557 97.1557 97.1557 95.8557 94.5670 93.2887 92.0216 90.7664 89.5238 88.2944 ... 95.7236 97.0213 97.0228 97.0208 95.7180 94.4261 93.1445 91.8743 90.6161 89.3709
13 97.8961 97.8961 97.8961 96.5961 95.3074 94.0291 92.7619 91.5068 90.2642 89.0347 ... 96.4896 97.7873 97.7888 97.7868 96.4840 95.1921 93.9106 92.6403 91.3821 90.1370
14 98.4437 98.4437 98.4437 97.1437 95.8550 94.5767 93.3096 92.0544 90.8118 89.5824 ... 97.0562 98.3539 98.3554 98.3534 97.0506 95.7588 94.4772 93.2069 91.9488 90.7036

15 rows × 324 columns

Sorted States according to utility values

Highest Utility to Lowest Utility States

For Experiment1 & Experiment 2, the states have been scaled down to represent the states in the scaled-down version.

In [4]:
for i, (k, v) in enumerate(DF_dict.items()):
    print(v[0], ":", v[1].shape)
    temp_df = pd.DataFrame(v[1].iloc[-1, :])
    temp_df = temp_df.sort_values(temp_df.columns[0], ascending=False)
    temp_df = [int(element/(v[3]**2)) for element in temp_df.index]
    print(temp_df, "\n")
DF-Original (Value Iteration) - 36 States : (689, 36)
[0, 1, 2, 7, 8, 3, 12, 13, 9, 19, 4, 18, 30, 14, 15, 20, 26, 5, 24, 33, 27, 32, 11, 21, 31, 34, 17, 28, 23, 35, 29, 10, 25, 22, 6, 16] 

DF-Original (Policy Iteration) - 36 States : (8, 36)
[0, 1, 2, 7, 8, 3, 12, 13, 9, 19, 4, 18, 30, 14, 15, 20, 26, 5, 24, 33, 27, 32, 11, 21, 31, 34, 17, 28, 23, 35, 29, 10, 25, 22, 6, 16] 

DF-Experiment1 (Value Iteration) - 144 States : (689, 144)
[0, 0, 3, 3, 12, 33, 15, 12, 30, 33, 15, 30, 21, 21, 18, 18, 0, 28, 25, 34, 34, 18, 27, 18, 28, 15, 31, 27, 25, 3, 31, 12, 22, 21, 15, 0, 33, 21, 31, 19, 34, 28, 24, 35, 30, 24, 31, 22, 25, 3, 28, 32, 12, 34, 1, 35, 19, 6, 9, 4, 25, 32, 16, 33, 30, 1, 22, 35, 6, 29, 9, 13, 4, 7, 32, 16, 19, 1, 10, 35, 22, 26, 29, 7, 4, 32, 29, 16, 10, 1, 13, 7, 29, 4, 26, 19, 26, 10, 13, 16, 2, 7, 26, 5, 23, 10, 13, 2, 23, 5, 20, 2, 20, 5, 17, 17, 2, 5, 8, 14, 14, 8, 11, 11, 11, 6, 6, 8, 8, 24, 9, 9, 11, 23, 20, 14, 14, 27, 17, 17, 24, 23, 20, 27] 

DF-Experiment1 (Policy Iteration) - 144 States : (13, 144)
[0, 3, 3, 0, 12, 15, 12, 33, 30, 33, 15, 30, 21, 21, 18, 18, 0, 18, 18, 27, 15, 28, 25, 3, 27, 28, 34, 34, 25, 12, 31, 31, 22, 21, 15, 21, 0, 19, 33, 24, 30, 31, 28, 24, 34, 35, 31, 3, 22, 25, 12, 28, 32, 1, 34, 19, 6, 35, 9, 4, 25, 32, 16, 33, 30, 1, 22, 6, 35, 9, 29, 13, 4, 7, 32, 16, 19, 1, 10, 22, 26, 35, 7, 4, 29, 32, 29, 16, 10, 1, 13, 7, 4, 29, 19, 26, 10, 13, 16, 26, 2, 7, 5, 26, 10, 13, 23, 2, 5, 23, 20, 2, 20, 5, 17, 2, 17, 5, 8, 14, 14, 8, 11, 11, 8, 8, 9, 6, 6, 14, 9, 11, 11, 14, 27, 27, 17, 17, 24, 24, 23, 23, 20, 20] 

DF-Experiment2 (Value Iteration) - 324 States : (689, 324)
[0, 2, 0, 0, 4, 4, 4, 2, 2, 12, 14, 12, 34, 34, 32, 14, 12, 16, 32, 30, 34, 16, 14, 32, 30, 22, 26, 35, 22, 28, 35, 22, 35, 24, 20, 26, 33, 20, 28, 33, 20, 24, 33, 18, 26, 31, 16, 30, 18, 28, 31, 18, 24, 31, 22, 0, 18, 18, 28, 16, 18, 2, 28, 30, 34, 29, 35, 14, 32, 20, 30, 20, 22, 33, 16, 29, 27, 28, 22, 4, 12, 30, 18, 22, 31, 16, 25, 29, 20, 32, 34, 32, 14, 20, 26, 0, 20, 34, 27, 26, 35, 30, 2, 26, 33, 22, 24, 12, 4, 32, 14, 18, 20, 25, 27, 31, 22, 24, 34, 24, 0, 35, 2, 33, 16, 23, 29, 6, 10, 30, 12, 4, 25, 18, 31, 34, 32, 30, 0, 35, 2, 33, 32, 10, 6, 14, 23, 27, 16, 21, 29, 4, 31, 34, 0, 35, 2, 33, 8, 6, 23, 25, 10, 12, 29, 19, 16, 4, 6, 31, 29, 0, 35, 2, 33, 14, 21, 27, 8, 10, 17, 6, 4, 29, 31, 8, 27, 1, 3, 8, 21, 25, 12, 27, 10, 14, 19, 15, 17, 6, 8, 29, 5, 27, 1, 25, 3, 10, 8, 27, 7, 15, 25, 13, 17, 5, 25, 12, 1, 19, 23, 3, 11, 9, 25, 7, 13, 15, 23, 5, 1, 21, 11, 3, 9, 23, 13, 7, 5, 21, 1, 11, 9, 19, 21, 3, 5, 19, 1, 19, 3, 17, 5, 17, 1, 17, 3, 15, 5, 15, 15, 1, 3, 13, 13, 5, 7, 13, 1, 3, 11, 11, 7, 5, 9, 11, 9, 7, 9, 23, 23, 23, 15, 15, 28, 8, 19, 8, 24, 8, 7, 7, 7, 21, 17, 19, 15, 24, 24, 9, 28, 13, 13, 13, 19, 17, 6, 6, 11, 11, 11, 6, 26, 26, 26, 10, 10, 10, 17, 21, 28, 9, 9, 21] 

DF-Experiment2 (Policy Iteration) - 324 States : (15, 324)
[12, 12, 14, 14, 12, 16, 14, 16, 2, 0, 4, 4, 4, 2, 2, 0, 0, 34, 32, 34, 32, 34, 30, 22, 22, 22, 20, 30, 32, 20, 20, 18, 35, 35, 16, 35, 33, 33, 33, 26, 28, 24, 26, 31, 28, 24, 26, 18, 30, 31, 18, 28, 31, 24, 22, 18, 18, 16, 18, 14, 0, 2, 28, 20, 28, 34, 20, 30, 35, 29, 32, 16, 33, 30, 22, 29, 28, 27, 22, 12, 4, 30, 18, 16, 31, 29, 22, 25, 20, 14, 20, 34, 32, 32, 0, 20, 26, 26, 2, 34, 27, 30, 35, 26, 33, 22, 24, 12, 4, 32, 14, 18, 27, 31, 20, 25, 22, 24, 34, 0, 24, 35, 2, 33, 16, 10, 29, 23, 6, 30, 12, 4, 18, 25, 31, 34, 32, 30, 0, 35, 2, 33, 10, 6, 32, 14, 16, 27, 29, 23, 21, 4, 31, 34, 0, 35, 2, 33, 8, 10, 12, 6, 25, 23, 16, 19, 29, 4, 6, 31, 29, 0, 35, 2, 8, 14, 33, 10, 27, 21, 17, 6, 4, 29, 8, 31, 27, 1, 8, 3, 12, 25, 21, 10, 27, 14, 19, 6, 15, 17, 8, 29, 5, 27, 1, 25, 3, 10, 8, 27, 7, 15, 5, 13, 17, 25, 12, 1, 25, 19, 23, 3, 11, 9, 25, 7, 13, 15, 5, 23, 1, 11, 21, 3, 9, 23, 13, 7, 5, 21, 1, 11, 9, 3, 19, 21, 5, 19, 1, 19, 3, 17, 5, 17, 1, 17, 3, 15, 5, 15, 1, 15, 3, 13, 5, 13, 7, 13, 1, 3, 11, 7, 11, 5, 9, 11, 9, 7, 9, 7, 17, 9, 23, 19, 23, 17, 17, 19, 21, 7, 8, 8, 8, 19, 23, 21, 21, 7, 9, 13, 24, 24, 13, 28, 6, 6, 11, 11, 11, 28, 28, 26, 26, 26, 10, 15, 15, 15, 24, 10, 10, 13, 9, 6] 

Graphs - Number of Iterations against Utility of State

In [5]:
for i, (k, v) in enumerate(DF_dict.items()):
    fig = go.Figure()
    for state_no in v[1].columns:
        fig.add_trace(go.Scatter(
            name="State " + str(state_no),
            x=v[1].index,
            y=v[1][state_no],
            mode="lines",
            showlegend=True))
    fig.update_layout(title={'text': v[0],
                             'y':0.9,
                             'x':0.45,
                             'xanchor': 'center',
                             'yanchor': 'top'},
                      xaxis_title="Iterations",
                      yaxis_title="Utility of State")
    fig.show()

Graph with Log-x Scale

In [6]:
for i, (k, v) in enumerate(DF_dict.items()):
    fig = go.Figure()
    for state_no in v[1].columns:
        fig.add_trace(go.Scatter(
            name="State " + str(state_no),
            x=v[1].index,
            y=v[1][state_no],
            mode="lines",
            showlegend=True))
    fig.update_layout(xaxis_type="log",
                      title={'text': v[0],
                             'y':0.9,
                             'x':0.45,
                             'xanchor': 'center',
                             'yanchor': 'top'},
                      xaxis_title="Iterations",
                      yaxis_title="Utility of State")
    fig.show()